In [13]:
import numpy as np
from sklearn import linear_model
from sklearn import datasets
import matplotlib.pyplot as plt
%matplotlib inline

Generalized Linear Models

1) Ordinary Least Squares


In [40]:
dataset = datasets.load_diabetes()
X = dataset.data[:, np.newaxis, 2]
Y = dataset.target.reshape(442, 1)

print(X.shape)
print(Y.shape)


(442, 1)
(442, 1)

In [30]:
# Implementation
b = np.zeros((X.shape[1], 1))
W = np.zeros((X.shape[1], 1))
learning_rate = 0.001
iterations = 10000
for _ in range(iterations):
    y_hat = b.T + np.dot(X, W.T)
    err = Y - y_hat
    error = np.sum(np.square(err), axis = 1)
    
    db = -2*np.sum(err)
    dW = -2*np.sum(np.dot(X.T, err), axis = 1)
    
    b = b - learning_rate*db
    W = W - learning_rate*dW

print(b, W)


[[152.13348416]] [[949.43525847]]

In [17]:
# Using sklearn
reg = linear_model.LinearRegression(fit_intercept = True, normalize = False, n_jobs = -1)
reg.fit(X, Y)
print(reg.intercept_, reg.coef_)


[152.13348416] [[949.43526038]]

In [29]:
plt.plot(X, Y, 'bo')
plt.plot(X, y_hat, color = 'red')
plt.xlabel('X values')
plt.ylabel('Predicted Values')
plt.show()


2) Ridge Regression


In [51]:
# Implementation
b = np.zeros((X.shape[1], 1))
W = np.zeros((X.shape[1], 1))
learning_rate = 0.001
iterations = 10000
alpha = 0.8

for _ in range(iterations):
    y_hat = b.T + np.dot(X, W.T)
    err = Y - y_hat
    error = np.sum(np.square(err), axis = 1) + alpha*np.sum(np.square(W))
    
    db = -2*np.sum(err)
    dW = -2*np.sum(np.dot(X.T, err), axis = 1) + 2*alpha*W
    
    b = b - learning_rate*db
    W = W - learning_rate*dW

print(b, W)


[[152.13348416]] [[527.46403355]]

In [48]:
# Using sklearn
reg = linear_model.Ridge(alpha = 0.8, fit_intercept = True, normalize = False, max_iter = 10000, solver = 'auto')
reg.fit(X, Y)
print(reg.intercept_, reg.coef_)


[152.13348416] [[527.46403355]]

In [52]:
plt.plot(X, Y, 'bo')
plt.plot(X, y_hat, color = 'red')
plt.xlabel('X values')
plt.ylabel('Predicted Values')
plt.show()



In [57]:
# To do cross validation for alpha in Ridge Regression
# Just use a for loop in the implementation to iterarate over all values of the alphas

# In sklearn use RidgeCV
reg = linear_model.RidgeCV(alphas = (0.1, 0.2, 0.3, 0.4), fit_intercept = True, normalize = False, scoring = None, cv = None, gcv_mode = None)
reg.fit(X, Y)
print(reg.intercept_, reg.coef_, reg.alpha_)


[152.13348416] [[863.12296399]] 0.1

3) Lasso


In [ ]: